
clear all
clear mata
clear matrix
set more off
set scheme modern

*set path here*

global rawdata = 
global workingdata = 
global match = 
global temp = 
global table = 

*****************
*clean CFPS data*
*****************

*****2010*****

***family2010***

use $rawdata\cfps2010famecon_201906.dta, clear

rename fid fid10
gen hincome_per=indinc_net  //household per capita income
keep fid10 hincome_per

save $workingdata/family2010.dta, replace

***adult2010***

use $rawdata\cfps2010adult_201906.dta, clear

gen year=2010
rename fid fid10

rename cfps2010edu_best edulist
replace edulist=. if edulist<=0  //education level
gen eduy=cfps2010eduy_best if cfps2010eduy_best>=0 //years of education
gen age=qa1age  //age
replace gender=gender  //gender
replace wordtest=. if wordtest<0
replace mathtest=. if mathtest<0  //cognitive test scores

gen indoor=(qg404==2|qg404==3|qg404==4|qg404==5)
gen outdoor=(qg404==1|qg404==6)
replace outdoor=1 if qg4==1  //working indoor/outdoor

gen cooperation=qz206 if qz206>0  //cooperation

keep pid countyid fid10 provcd year ///
edulist eduy age gender ///
wordtest mathtest ///
indoor outdoor cooperation

save $workingdata\adult2010.dta, replace

***child2010***

use $rawdata\cfps2010child_201906.dta, clear

gen year=2010
rename fid fid10

rename cfps2010edu_best edulist
replace edulist=. if edulist<=0  //education level
gen eduy=cfps2010eduy_best if cfps2010eduy_best>=0 //years of education
gen age=wa1age if wa1age>=0  //age
replace gender=. if gender<0  //gender

replace wordtest=. if wordtest<0
replace mathtest=. if mathtest<0
drop if wordtest==.&mathtest==.  //cognitive test scores

gen cooperation=wz206 if wz206>0  //cooperation

keep pid fid10 provcd countyid year ///
edulist eduy age gender ///
wordtest mathtest ///
cooperation

save $workingdata\child2010.dta, replace

***append & merge***

use $workingdata\adult2010.dta, clear

append using $workingdata\child2010.dta

joinby fid10 using $workingdata\family2010.dta, unmatched(master)
drop _merge

joinby pid using $rawdata\date2010.dta, unmatched(master)
drop _merge

joinby pid using $rawdata\time2010.dta, unmatched(master)
drop _merge

gen day=iwenddate
gen iwyear=year(day)
tab iwyear
replace day=. if iwyear==1919
drop iwyear

save $workingdata\data2010.dta, replace

*****2014*****

***family2014***

use $rawdata\cfps2014famecon_201906.dta, clear

gen hincome_per=fincome2_per  //household per capita income

gen AC=0
forvalues i=1/15 {
replace AC=1 if fs6_s_`i'==11  //AC ownership
}
keep fid14 hincome_per AC

save $workingdata/family2014.dta, replace

***adult2014***

use $rawdata\cfps2014adult_201906.dta, clear

gen year=2014
gen countyid=countyid14 if countyid14>0
gen provcd=provcd14 if provcd14>0

rename cfps2014edu edulist
replace edulist=. if edulist<=0
gen eduy=cfps2014eduy_im if cfps2014eduy_im>=0 //years of education
gen age=cfps2014_age  
replace age=. if age<0  //age
gen gender=cfps_gender
replace gender=. if gender<0  //gender

gen wordtest=wordtest14 if wordtest14>=0
gen mathtest=mathtest14 if mathtest14>=0 //cognitive test scores

gen indoor=(qg20==2|qg20==3|qg20==4|qg20==5)
gen outdoor=(qg20==1|qg20==6)
replace outdoor=1 if qg101==1  //working indoor/outdoor

gen cooperation=qz206 if qz206>0  //cooperation
gen impatience=qz5 if qz5>0  //impatience

keep pid countyid fid10 fid12 fid14 provcd year ///
edulist eduy age gender ///
wordtest mathtest ///
indoor outdoor cooperation impatience

save $workingdata\adult2014.dta, replace

***child2014***

use $rawdata\cfps2014child_201906.dta, clear

gen year=2014
rename provcd14 provcd
replace provcd=. if provcd<0
rename countyid14 countyid
replace countyid=. if countyid<0

rename cfps2014edu edulist
replace edulist=. if edulist<=0
gen eduy=cfps2014eduy_im if cfps2014eduy_im>=0 //years of education
gen age=cfps2014_age if cfps2014_age>=0
gen gender=cfps_gender if cfps_gender>=0

gen wordtest=wordtest14 if wordtest14>=0
gen mathtest=mathtest14 if mathtest14>=0  //cognitive test scores

drop if wordtest==.&mathtest==.

gen cooperation=kz206_b_2 if kz206_b_2>0
gen impatience=kz5_b_2 if kz5_b_2>0

keep pid fid14 fid12 fid10 provcd countyid year ///
edulist eduy age gender ///
wordtest mathtest ///
cooperation impatience

save $workingdata\child2014.dta, replace

***append & merge***

use $workingdata\adult2014.dta, clear

append using $workingdata\child2014.dta

joinby fid14 using $workingdata\family2014.dta, unmatched(master)
drop _merge

joinby pid using $rawdata\date2014.dta, unmatched(master)
drop if _merge==1
drop _merge longform

joinby pid using $rawdata\time2014.dta, unmatched(master)
drop _merge

gen day=iwenddate
gen iwyear=year(day)
tab iwyear
replace day=. if iwyear==1919
drop iwyear

save $workingdata\data2014.dta, replace

*****2018*****

***family2018***

use $rawdata\cfps2018famecon_202101.dta, clear

gen hincome_per=fincome2_per  //household per capita income
keep fid18 hincome_per

save $workingdata/family2018.dta, replace

***person2018***

use $rawdata\cfps2018person_202012.dta, clear

gen year=2018
gen provcd=provcd18 if provcd18>0

rename cfps2018edu edulist
replace edulist=. if edulist<=0  //education level
gen eduy=cfps2018eduy if cfps2018eduy>=0  //years of education
replace age=. if age<0  //age
rename gender gendero
gen gender=gender_update
replace gender=. if gender<0  //gender

gen wordtest=wordtest18 if wordtest18>=0
gen mathtest=mathtest18 if mathtest18>=0  
replace wordtest=. if wordtest==0&mathtest==.  //cognitive test scores

gen indoor=(qg20==2|qg20==3|qg20==4|qg20==5)
gen outdoor=(qg20==1|qg20==6)
replace outdoor=1 if qg101==1  //working indoor/outdoor

gen impatience=kz5 if kz5>0  //impatience

keep pid fid10 fid12 fid14 fid16 fid18 provcd year ///
edulist eduy age gender ///
wordtest mathtest ///
indoor outdoor impatience

save $workingdata\person2018.dta, replace

***append & merge***

use $workingdata\person2018.dta, clear

joinby fid18 using $workingdata\family2018.dta, unmatched(master)
drop _merge

joinby pid using $rawdata\datetime2018.dta, unmatched(master)
drop _merge

rename countyid18 countyid
replace countyid=. if countyid<0

gen day=iwenddate
gen iwyear=year(day)
tab iwyear
replace day=. if iwyear==2012
drop iwyear

save $workingdata\data2018.dta, replace

*****merge all the waves*****

use $workingdata\data2010.dta, clear

append using $workingdata\data2014.dta
append using $workingdata\data2018.dta

replace edulist=9 if edulist==.
label define edulist 1 "文盲/半文盲" 2 "小学" 3 "初中" 4 "高中/中专/技校/职高" 5 "大专" 6 "大学本科" 7 "硕士" 8 "博士" 9 "missing"
label values edulist edulist

replace eduy=0 if edulist==1&eduy==.
replace eduy=6 if edulist==2&eduy==.
replace eduy=9 if edulist==3&eduy==.
replace eduy=12 if edulist==4&eduy==.
replace eduy=15 if edulist==5&eduy==.
replace eduy=16 if edulist==6&eduy==.
replace eduy=19 if edulist==7&eduy==.
replace eduy=22 if edulist==8&eduy==.

order pid year fid10 fid12 fid14 fid16 fid18
sort pid year
xtset pid year

drop if wordtest==.&mathtest==.

save $workingdata\workingdata.dta, replace
